{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Using stochastic gradient descent for regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import make_regression\n",
    "import numpy as np\n",
    "\n",
    "X, y = make_regression(int(1e6))  #1,000,000 rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print \"{:,}\".format(X.nbytes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X.nbytes / 1e6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X.nbytes / (X.shape[0]*X.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\jdavila\\Anaconda27\\lib\\site-packages\\sklearn\\linear_model\\stochastic_gradient.py:84: FutureWarning: max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDRegressor'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.\n",
      "  \"and default tol will be 1e-3.\" % type(self), FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,\n",
       "       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',\n",
       "       loss='squared_loss', max_iter=5, n_iter=None, penalty='l2',\n",
       "       power_t=0.25, random_state=None, shuffle=True, tol=None, verbose=0,\n",
       "       warm_start=False)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import SGDRegressor\n",
    "sgd = SGDRegressor()\n",
    "train = np.random.choice([True, False], size=len(y), p=[.75, .25])\n",
    "sgd.fit(X[train], y[train])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xc4f80f0>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEACAYAAACj0I2EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFqRJREFUeJzt3XuMbXV5xvHnORcoYGEOWs6EomzRoLbRjlgvicTuiBds\no9gT0xxp1cHGmFJro9aI9Q+qaQyaVJvQ8IeVMkAkVC1eaFNFq4uEWqoVBlCPlF4O4IWxyLENITkH\nnLd/7D1n7ZnZe/ZlXfda30+yw1lrfmv9Xn577ffseWb2Oo4IAQDm366qCwAA5IOGDgANQUMHgIag\noQNAQ9DQAaAhaOgA0BBjG7rtq22v2b57y/4/sn3I9j22ryiuRADAJPZMMOYaSVdKum5jh+2upNdK\nem5EPGH7KcWUBwCY1Nh36BFxm6QjW3b/gaQrIuKJ/piHC6gNADCFWTP0cyW9zPbttr9u+9fzLAoA\nML1JIpdRx+2LiJfYfqGkT0s6J7+yAADTmrWhPyjpJkmKiG/ZXrf95Ij46daBtrlZDADMICI8zfhJ\nIxf3Hxs+L+nlkmT7XEl7hzXzgaJ4ROjyyy+vvIa6PFgL1oK12Pkxi7Hv0G3fIKkr6cm2H5B0uaS/\nkXSN7XskHZX05plmb5nDhw9XXUJtsBYp1iLFWmQztqFHxMUjvvSmnGsBAGTAJ0VLtLy8XHUJtcFa\npFiLFGuRjWfNaiaewI6i5wCAprGtKOiHoshBkiRVl1AbrEWKtUixFtnQ0AGgIYhcAKCGiFwAoMVo\n6CUiH0yxFinWIsVaZENDB4CGIEMHgBoiQweAFqOhl4h8MDVPa7G42JHtbY/FxU6mc+zefUqmc2aV\nx/9X3ubpuqijWW+fC7TG2tr9krbHhmtrk383POwc6+vu70vUu//ddOfMKo//L9QLGTowhr3ReLd9\nZeLbnA4/x/B9Zb1e8vj/QnHI0AGgxWjoJSIfTLEWg5KqC6gNrotsaOgA0BBk6MAYZOioAhk6ALQY\nDb1E5IMp1mJQUnUBtcF1kc3Yhm77attrtu8e8rX32F63fXox5QEAJjU2Q7d9vqRHJV0XEc8b2H+W\npE9KepakF0TEIyOOJ0PHXCNDRxUKydAj4jZJR4Z86eOS3jvNZACA4syUodt+naQHI+KenOtpNPLB\nFGsxKKm6gNrgushm6nu52D5J0p9KeuXg7twqAgDMZJabcz1DUkfSXe6FcGdJ+rbtF0XET4YdsLy8\nrE6nI0laWFjQ0tKSut2upPRv5DZsd7vdWtXD9uTbqY3tfI4f9u48SZLc6z94cLl/M65ts+1YT9nr\nvbGv6ue7iu0kSbSysiJJx/vltCb6YJHtjqSbI+K5Q77235LOi4hhOTs/FMXca8IPRSefv7gaMJ1C\nfihq+wZJ35B0ru0HbF+yZUiIyGUi29+ttRdrMSipuoDa4LrIZmzkEhEXj/n6OfmVAwCYFfdyAcYg\nckEVuJcLALQYDb1E5IMp1mJQUnUBtcF1kQ0NHQAaggwdGIMMHVUgQweAFqOhl4h8MMVaDEqqLqA2\nuC6yoaEDQEOQoQNjkKGjCmToQMUWFzuyve0BlIGGXiLywVRT16J3R8MY8thJUnBV86Op10VZaOgA\n0BBk6MAY02ToO40lQ8c0yNABoMVo6CUiH0yxFoOSqguoDa6LbGjoANAQZOjAGGToqAIZOgC0GA29\nROSDKdZiUFJ1AbXBdZHNJP9I9NW212zfPbDvo7YP2V61/Xe2Ty22TADAOGMzdNvnS3pU0nUR8bz+\nvldI+lpErNu+QlJExPtHHE+GjrlGho4qFJKhR8Rtko5s2ffViFjvb94u6axpJgUA5C+PDP2tkv4x\nh/M0HvlgirUYlFRdQG1wXWSzJ8vBtj8g6fGIuGGnccvLy+p0OpKkhYUFLS0tqdvtSkqfQLbbtb2h\nLvVMWm/afIePT8d0t4wffnxve3Vgu3fOsuofvZ3Wksf8k26vrq6WOl+dtpMk0crKiiQd75fTmuj3\n0G2fLenmjQy9v29Z0tskvTwiju5wLBk65hoZOqowS4Y+6Tt09x8bE10o6b2SXrZTMwcAlGeSX1u8\nQdI3JJ1r+wHbl0i6UtKTJH3F9h22ryq4zkbY/u1ve7EWg5KqC6gNrotsxr5Dj4iLh+y+poBaAAAZ\ncC8XYAwydFSBe7kAQIvR0EtEPphiLQYlVRdQG1wX2dDQAaAhyNCBMcjQUQUydABoMRp6icgHU6zF\noKTqAmqD6yIbGjoANAQZOjAGGTqqQIYOTGhxsSPb2x6Li52KKzuxpnVhHtDQS0Q+mKp6LdbW7lfv\n3enmR29/2ZKBPx+tUV3lq/q6mHc0dABoCDJ0tFJeuXgRGXoRuTYZ+vwhQweAFqOhl4h8MMVaDEqq\nLqA2uC6yoaEDQEOQoaOVyNCLmwv5IEMHgBajoZeIfDDFWgxKqi6gNrguspnkH4m+2vaa7bsH9u2z\nfYvte21/2fZpxZYJABhnbIZu+3xJj0q6LiKe19/3EUk/jYiP2n6fpH0RcdmI48nQUTtk6MXNhXwU\nkqFHxG2SjmzZfZGka/t/vlbS66eZFACQv1kz9DMiYk2SIuIhSWfkV1JzkQ+mWItBSdUF1AbXRTZ7\ncjrPjt+fLS8vq9PpSJIWFha0tLSkbrcrKX0C2W7X9oaq50+baXdTTZOO33r+dEx3y/hR8yWSVjfN\nv9Pxg/MtLnaG3rRr3779euSRhyaqf/T29vmGbZ9++qKOHFkbO/+kz8/q6upU45u0nSSJVlZWJOl4\nv5zWRL+HbvtsSTcPZOiHJHUjYs32oqSvR8RzRhxLho7aaUKGnr2u7Bn6qPPyms+uyN9Dd/+x4YuS\nlvt/foukL0wzKQAgf5P82uINkr4h6VzbD9i+RNIVkl5p+15JF/S3Mcb2b3/bi7UYlFRdQG1wXWQz\nNkOPiItHfOkVOdcCAMiAe7mglcjQp5trFDL04nAvFwBoMRp6icgHU6zFoKTqAmqD6yIbGjoANAQZ\nOlqJDH26uUYhQy8OGToAtBgNvUTkgynWYlBSdQG1wXWRDQ0dABqCDB2tRIY+3VyjkKEXZ5YMPa+7\nLQIo1In95gmMRuRSIvLBFGsxKJlgzFH13gkPPpqH6yIbGjoANAQZOlppHjP0/PN2MvQ64/fQAaDF\naOglIh9MsRaDkqoLqA2ui2xo6ADQEGToaCUy9OmOH4UMvThk6ADQYjT0EpEPpliLQUnVBdQG10U2\nmRq67XfZ/o7tu21/yvYJeRUGAJjOzBm67TMl3Sbp2RFxzPbfSvqHiLhuyzgydNQOGfp0x49Chl6c\nKu7lslvSKbbXJZ0s6UcZzwcAmNHMkUtE/EjSX0h6QNIPJf0sIr6aV2FNRD6YYi0GJVUXUBtcF9nM\n/A7d9oKkiySdLel/JX3W9sURccPWscvLy+p0OpKkhYUFLS0tqdvtSkqfQLbbtb2h6vnTZtrdVNOk\n47eePx3T3TJ+1HyJpNVN809//LBtbapvuuP3bruz465dJ2t9/TENl4w9XpL27z9bN9640pttxPOz\nurq649ebvJ0kiVZWViTpeL+cVpYM/Q2SXh0Rb+tvv0nSiyPiHVvGkaGjdsjQ85uriGwe5f8e+gOS\nXmL7F9y7Wi6QdCjD+QAAGWTJ0L8p6bOS7pR0l3p/LX8ip7oaiXwwxVoMSqouoDa4LrLJ9FsuEfFB\nSR/MqRYAQAbcywWtRIae31xk6MXgXi4A0GI09BKRD6ZYi0FJ1QXUBtdFNjR0AGgIMnS0Ehl6fnOR\noReDDB0AWoyGXiLywRRrMSipuoDa4LrIhoYOAA1Bho5WIkPPby4y9GKQoaM1Fhc7sr3tsbjYqbq0\nGjhx27rME57b2dHQS0Q+mMq6Fmtr96v3LnDzo7d/3iQ5n++otq/NfEiSpGHPbblo6ADQEGTomEvT\n5NpZj5/HDL1+c/X25/3cNBkZOgC0GA29RGToKdZiUFJ1AbXBdZENDR0AGoIMHXOJDD2PsWTodUaG\nDgAtRkMvEflgirUYlFRdQG1wXWSTqaHbPs32Z2wfsv1d2y/OqzAAwHQyZei2VyTdGhHX2N4j6eSI\n+L8tY8jQkTsy9DzGkqHX2SwZ+swN3fapku6MiGeMGUdDR+5o6HmMpaHXWdk/FH26pIdtX2P7Dtuf\nsH1ShvM1HvlgirUYlFRdQG1wXWSzJ+Ox50n6w4j4N9t/KekySZdvHbi8vKxOpyNJWlhY0NLSkrrd\nrqT0CWS7XdsHDhzUkSNrGrR//9m68caViY5PbWx3+//du+3ugvv27dcjjzw04fG9MZPOt7W+dEx3\ny/hR8yWSVjfNP/3xw7a3ji/q+I19447vfX3c9bG6uprp+HneTpJEKysrknS8X04rS+SyX9K/RMQ5\n/e3zJb0vIl67ZRyRC7YZdX/uSa+VaaONomIUIhcil6KUGrlExJqkB22f2991gaTvzXo+AEA2WX8P\n/Z2SPmV7VdKvSfpw9pKai3wQwyVVF1AbvEayyZKhKyLukvTCnGoBAGTAvVxQCTL0afYVNZYMvc64\nlwsAtBgNvUTkgxguqbqA2uA1kg0NHQAaggwdlSBDn2ZfUWPJ0OuMDB0AWoyGXiLyQQyXVF1AbfAa\nyYaGDgANQYaOSpChT7OvqLFk6HVGho5GWlzsyPamR3FOnGKuacYCxaOhl4h8cDZra/er945t8FGU\no1PMNc3YnSQzHtc8vEayoaEDQEOQoaMS02Too8YWlaG3J9cmQ68zMnQAaDEaeonIBzFcUnUBtcFr\nJBsaOgA0BBk6KkGG3ra5evvJ0CdHhg4ALUZDLxH5IIZLqi6gNniNZJO5odveZfsO21/MoyAAwGwy\nZ+i23yXpBZJOjYjXDfk6GTq2IUNv21y9/WTokys9Q7d9lqTflPTJLOcBAGSXNXL5uKT3qtibazQG\n+SCGS6ouoDZ4jWSzZ9YDbf+WpLWIWLXdVe/7rKGWl5fV6XQkSQsLC1paWlK325WUPoFsz9f2wYPL\n/ZtmpXbtOlnr649t2rdv337ddNON245PbWx3tXH3wp3tdPzgvu6Wr2uC+ccfv318Hsev5jj/qPFF\nHb+xb9zxva9vvZ5OP31RR46sabTB47dfH6Our3ncTpJEKysrknS8X05r5gzd9ocl/Z6kJySdJOkX\nJd0UEW/eMo4MvYEmz7WLy8XJ0Odprt7+yZ6HbM9tU8ySoefywSLbvyHpPfxQtD1o6EWNbepcvf00\n9MnxwaKaIx/EcEnVBdRIUnUBc23mDH1QRNwq6dY8zgUAmA33csFMiFyKGtvUuXr7iVwmR+QCAC1G\nQy8RGTqGS6ouoEaSqguYazR0AGgIMnTMhAy9qLFNnau3nwx9cmToANBiNPQSkaFjuKTqAmokqbqA\nuUZDB4CGIEPHTMjQixrb1Ll6+8nQJ0eGDgzVu0vf4APNtrjY2facLy52qi6rcDT0EpGhV+Woeu/s\nBh91klRdQI0kuZyld2vnzc/51ts9NxENHQAaggwdM5m3DL0O+XH96mpuhj7Nv1lbV2ToANBiNPQS\nkaFjuKTqAmokqbqAuUZDB4CGIEPHTMjQm1AXGXqdkaEDQIvN3NBtn2X7a7a/a/se2+/Ms7AmIkPH\ncEnVBdRIUnUBcy3Lvyn6hKR3R8Sq7SdJ+rbtWyLi+znVBgCYQm4Zuu3PS7oyIv5py34y9AYiQ29C\nXWTodVZZhm67I2lJ0r/mcT4AwPQyN/R+3PJZSX8cEY9mL6m5yNAxXFJ1ATWSVF3AXMuSocv2HvWa\n+fUR8YVR45aXl9XpdCRJCwsLWlpaUrfblZQ2Obar315c7Gy7gZF9oiKOarSk/9/uiO29Y+5uOO74\nRJuN2u5u2Vf08Vu/nuX41RznHzW+qOM39o07vquNu15uN3j86pj5ts+fJMm263ma+fft269HHnlo\n0/GTvj6mOX7U9oEDB3XkyNqmc55yymnb5plEpgzd9nWSHo6Id+8whgx9ThSTa5NV17euZqxBHj+j\nmaRH7XQP/Yx9dMh5e3/xlJah236ppN+V9HLbd9q+w/aFs54PAJDNzA09Iv45InZHxFJEPD8izouI\nL+VZXNOQoWO4pOoCaiSpuoC5xidFAaAhuJcLjiNDr8PYps5VXF1k6CneoQNAQ9DQS0SGjuGSqguo\nkaTqAuYaDR0AGoIMHceRoddhbFPnKq4uMvQU79ABoCFo6CUiQ8dwSdUF1EhSdQFzjYYOAA1Bhl6Q\nY8eO6dJL/0Rraw9v2n/CCXt05ZVX6Mwzz6yostHI0OswtqlzFVcXGXoq090WMdrDDz+s66+/TseO\nXbVp/0knfVy33367Dhw4MPO5R931bf/+s/XQQ4fHjt2162Strz828/xAfYy6g2O2c2R9jYx6jRb9\n2qOhF2jPnpN17NjFA3sS7d371Mzn7V0o298RrK1tv7CHjV1f3+kdEMqXaPPtaNssmXL8UWW/lref\nY/hrZPJzjnqNZj3vOGToANAQNPRSdasuALXUrbqAGulWXcBco6EDQEPQ0EuVVF0AaimpuoAaSaou\nYK7R0AGgIWjopepWXQBqqVt1ATXSrbqAuUZDB4CGyNTQbV9o+/u2/932+/IqqrmSqgtALSVVF1Aj\nSdUFzLWZG7rtXZL+StKrJf2qpDfafnZehTXTatUFoJa4LlKsRRZZ3qG/SNJ9EXF/RDwu6UZJF+VT\nVlP9rOoCUEtcFynWIossDf2XJT04sP2D/j4AQAW4l0tBdu/erWPHjujUU197fN9jj92pY8ee0N69\nl1RYGerncNUF1MjhqguYazPfPtf2SyT9WURc2N++TFJExEe2jGvfvXMBIAfT3j43S0PfLeleSRdI\n+rGkb0p6Y0QcmumEAIBMZo5cIuLntt8h6Rb1sviraeYAUJ3C/8UiAEA5CvmkqO032P6O7Z/bPm9g\n/9m2H7N9R/9x1U7naYJRa9H/2vtt32f7kO1XVVVjVWxfbvsHA9fDhVXXVCY+mJeyfdj2XbbvtP3N\nquspm+2rba/Zvntg3z7bt9i+1/aXbZ827jxFffT/Hkm/LenWIV/7j4g4r/+4tKD562ToWth+jqTf\nkfQcSa+RdJWz/1ta8+hjA9fDl6oupix8MG+bdUndiHh+RLyo6mIqcI1618KgyyR9NSKeJelrkt4/\n7iSFNPSIuDci7tPwf1upVU1rh7W4SNKNEfFERByWdJ96H9Zqm1ZdDwP4YN5mVovvLRURt0k6smX3\nRZKu7f/5WkmvH3eeKhaw0//2+uu2z69g/rrY+sGsH6qdH8x6h+1V25+c5FvKBuGDeZuFpK/Y/pbt\nt1VdTE2cERFrkhQRD0k6Y9wBM/+Wi+2vSNo/uEu9J+UDEXHziMN+JOlpEXGknyd/3vavRMSjs9ZR\nBzOuRSvstDaSrpL0oYgI238u6WOSfr/8KlEDL42IH9v+JfUa+6H+u1akxv4GS5ZfW3zlDMc8rv63\nFRFxh+3/lHSupDtmraMOZlkL9d6RP3Vg+6z+vkaZYm3+WlKb/vL7oaSnDWw38vmfVET8uP/f/7H9\nOfUiqbY39DXb+yNizfaipJ+MO6CMyOV4Rmr7Kf0fBsn2OZKeKem/SqihLgbz4i9KOmj7BNtPV28t\nWvXT/f5FuuGApO9UVUsFviXpmf3f/DpB0kH1ronWsX2y7Sf1/3yKpFepXdfCBmt7j1ju//ktkr4w\n7gSF3MvF9uslXSnpKZL+3vZqRLxG0sskfcj2MfV+qv32iGj07dVGrUVEfM/2pyV9T9Ljki6N9n0o\n4KO2l9S7Fg5Lenu15ZSHD+Ztsl/S5/q3Cdkj6VMRcUvFNZXK9g3q/XNNT7b9gKTLJV0h6TO23yrp\nfvV+K27n87SvhwBAM7X214QAoGlo6ADQEDR0AGgIGjoANAQNHQAagoYOAA1BQweAhqChA0BD/D8O\n9k9woz1ggwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xb0ed860>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "y_pred = sgd.predict(X[~train])\n",
    "\n",
    "%matplotlib inline\n",
    "import pandas as pd\n",
    "\n",
    "pd.Series(y[~train] - y_pred).hist(bins=50)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
